{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "inputHidden": false,
    "outputHidden": false
   },
   "source": [
    "# Profiler performance\n",
    "\n",
    "We use the part of the instacart data that you can find here https://www.instacart.com/datasets/grocery-shopping-2017\n",
    "\n",
    "Specically order_products__prior.csv a 4 columns, 33.2 Million rows csv file.\n",
    "\n",
    "It took 355.58 seconds to process all the data set in a Windows 10, \n",
    "Instacart data\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"..\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Just check that Spark and all necessary environments vars are present...\n",
      "-----\n",
      "SPARK_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n",
      "HADOOP_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n",
      "You don't have PYSPARK_PYTHON set\n",
      "You don't have PYSPARK_DRIVER_PYTHON set\n",
      "JAVA_HOME=C:\\Program Files\\Java\\jdk1.8.0_181\n",
      "Pyarrow Installed\n",
      "-----\n",
      "Starting or getting SparkSession and SparkContext...\n",
      "\n",
      "                             ____        __  _                     \n",
      "                            / __ \\____  / /_(_)___ ___  __  _______\n",
      "                           / / / / __ \\/ __/ / __ `__ \\/ / / / ___/\n",
      "                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) \n",
      "                          \\____/ .___/\\__/_/_/ /_/ /_/\\__,_/____/  \n",
      "                              /_/                                  \n",
      "                              \n",
      "Transform and Roll out...\n",
      "Setting checkpoint folder local. If you are in a cluster initialize Optimus with master='your_ip' as param\n",
      "Deleting previous folder if exists...\n",
      "Creating the checkpoint directory...\n",
      "Optimus successfully imported. Have fun :).\n"
     ]
    }
   ],
   "source": [
    "# Create optimus\n",
    "from optimus import Optimus\n",
    "op = Optimus(master=\"local[*]\", app_name = \"optimus\" ,verbose =True, checkpoint= True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Benchmark "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = op.load.csv(\"C:\\\\Users\\\\argenisleon\\\\Desktop\\\\order_products__prior.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "    .data_type {\n",
       "        font-size: 0.8em;\n",
       "        font-weight: normal;\n",
       "    }\n",
       "\n",
       "    .column_name {\n",
       "        font-size: 1.2em;\n",
       "    }\n",
       "\n",
       "    .info_items {\n",
       "        margin: 10px 0;\n",
       "        font-size: 0.8em;\n",
       "    }\n",
       "\n",
       "    .optimus_table tr:nth-child(even) {\n",
       "        background-color: #f2f2f2 !important;\n",
       "    }\n",
       "\n",
       "    .optimus_table tr:nth-child(odd) {\n",
       "        background-color: #ffffff !important;\n",
       "    }\n",
       "\n",
       "    .optimus_table thead {\n",
       "        border-bottom: 1px solid black;\n",
       "    }\n",
       "    .optimus_table{\n",
       "        font-size: 12px;\n",
       "    }\n",
       "\n",
       "</style>\n",
       "\n",
       "\n",
       "\n",
       "\n",
       "<div class=\"info_items\">Viewing 100 of 32.4 million rows / 4 columns</div>\n",
       "<div class=\"info_items\">8 partition(s)</div>\n",
       "\n",
       "<table class=\"optimus_table\">\n",
       "    <thead>\n",
       "    <tr>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">order_id</div>\n",
       "            <div class=\"data_type\">1 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">product_id</div>\n",
       "            <div class=\"data_type\">2 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">add_to_cart_order</div>\n",
       "            <div class=\"data_type\">3 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">reordered</div>\n",
       "            <div class=\"data_type\">4 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "    </tr>\n",
       "\n",
       "    </thead>\n",
       "    <tbody>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            33120\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            28985\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9327\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            45918\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            30035\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17794\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            40141\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1819\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            43668\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            33754\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            24838\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17704\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            21903\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17668\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            46667\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17461\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            32665\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            46842\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            26434\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            39758\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            27761\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            10054\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            21351\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            22598\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            34862\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            40285\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17616\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            25146\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            32645\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            41276\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            13\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            13176\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            15005\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            47329\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            27966\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            23909\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            48370\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            13245\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9633\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            27360\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6348\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            40878\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6184\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            48002\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            13\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            20914\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            14\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            37011\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            15\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            12962\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            16\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            45698\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            24773\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            18\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            18569\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            19\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            41176\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            20\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            48366\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            21\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            47209\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            22\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            46522\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            23\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            38693\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            24\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            48825\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            25\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8479\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            26\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            40462\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            15873\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            41897\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            34050\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            46802\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            23423\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            21405\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            47890\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            11182\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2014\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            29193\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            34203\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            14992\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            31506\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            23288\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            44533\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            18362\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            27366\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            432\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            13\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3990\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            14\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            14183\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            15\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            24852\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4796\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            31717\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            47766\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4605\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1529\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            21137\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            22122\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            34134\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            27156\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            14992\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            49235\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            26842\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            13\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3464\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            14\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            10\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            25720\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            15\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            30162\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            27085\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5994\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1313\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            11\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            31506\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            30597\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            15221\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            12\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            43772\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    </tbody>\n",
       "</table>\n",
       "\n",
       "<div class=\"info_items\">Viewing 100 of 32.4 million rows / 4 columns</div>\n",
       "<div class=\"info_items\">8 partition(s)</div>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df.table()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing column 'order_id'...\n",
      "_count_data_types() executed in 18.69 sec\n",
      "count_data_types() executed in 18.69 sec\n",
      "cast_columns() executed in 0.01 sec\n",
      "_exprs() executed in 16.04 sec\n",
      "general_stats() executed in 16.05 sec\n",
      "------------------------------\n",
      "Processing column 'order_id'...\n",
      "frequency() executed in 23.65 sec\n",
      "stats_by_column() executed in 8.83 sec\n",
      "percentile() executed in 12.21 sec\n",
      "extra_numeric_stats() executed in 37.45 sec\n",
      "bucketizer() executed in 0.29 sec\n",
      "hist() executed in 14.6 sec\n",
      "dataset_info() executed in 22.43 sec\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>\n",
       "    .main{\n",
       "        width:100%;\n",
       "        overflow:auto;\n",
       "        border-bottom:1px solid #eeeeee;\n",
       "        padding: 10px 0;\n",
       "    }\n",
       "    .panel_profiler{\n",
       "        margin-right:2%;\n",
       "        float:left;\n",
       "        padding-bottom:2%;\n",
       "    }\n",
       "    .title_profiler{\n",
       "        padding:20px;\n",
       "        background-color: #eeeeee\n",
       "    }\n",
       "    .info{\n",
       "        overflow: auto\n",
       "    }\n",
       "\n",
       "\n",
       "</style>\n",
       "\n",
       "<div class=\"title_profiler\">\n",
       "    <h1>Overview</h1>\n",
       "</div>\n",
       "<div class=\"main\">\n",
       "    <div class=\"panel_profiler\">\n",
       "        <h2>Dataset info</h2>\n",
       "        <table>\n",
       "            <tbody>\n",
       "            <tr>\n",
       "                <td>Number of columns</td>\n",
       "                <td>4</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Number of rows</td>\n",
       "                <td>32434489</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Total Missing (%)</td>\n",
       "                <td>0.0%</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Total size in memory</td>\n",
       "                <td>188.4 MB</td>\n",
       "\n",
       "            </tr>\n",
       "            </tbody>\n",
       "        </table>\n",
       "    </div>\n",
       "    <div class=\"panel_profiler\">\n",
       "        <h2>Column types</h2>\n",
       "        <table>\n",
       "            <tbody>\n",
       "            <tr>\n",
       "                <td>String</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Numeric</td>\n",
       "                <td>1</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Date</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Bool</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "             <tr>\n",
       "                <td>Array</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Not available</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            </tbody>\n",
       "        </table>\n",
       "    </div>\n",
       "</div><style>\n",
       "    .main{\n",
       "        width:100%;\n",
       "        overflow:auto;\n",
       "        border-bottom:1px solid #eeeeee;\n",
       "        padding: 10px 0;\n",
       "    }\n",
       "    .panel_profiler{\n",
       "        margin-right:2%;\n",
       "        float:left;\n",
       "        padding-bottom:2%;\n",
       "    }\n",
       "    .title_profiler{\n",
       "        padding:20px;\n",
       "        background-color: #eeeeee\n",
       "    }\n",
       "    .info{\n",
       "        overflow: auto\n",
       "    }\n",
       "\n",
       "\n",
       "\n",
       "</style>\n",
       "\n",
       "<div class=\"main\">\n",
       "    <div class=\"info\">\n",
       "\n",
       "        \n",
       "\n",
       "        <div class=\"panel_profiler\">\n",
       "            <div>\n",
       "                <h2>order_id</h2>\n",
       "                <div>numeric</div>\n",
       "            </div>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Unique</td>\n",
       "                    <td> 3025302</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Unique (%)</td>\n",
       "                    <td> 9.327</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Missing</td>\n",
       "                    <td>0.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Missing (%)</td>\n",
       "                    <td>0</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "            <div>\n",
       "                <h3>\n",
       "                    Datatypes\n",
       "                </h3>\n",
       "            </div>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        String\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Integer\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        32434489\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Float\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Bool\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Date\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Missing\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Null\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "            \n",
       "            <div>\n",
       "                <h3>\n",
       "                    Basic Stats\n",
       "                </h3>\n",
       "\n",
       "            </div>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Mean</td>\n",
       "                    <td>1710748.5189427834</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Minimum</td>\n",
       "                    <td>2</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Maximum</td>\n",
       "                    <td>3421083</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Zeros(%)</td>\n",
       "                    <td>0</td>\n",
       "                </tr>\n",
       "\n",
       "                </tbody>\n",
       "            </table>\n",
       "            \n",
       "\n",
       "        </div>\n",
       "        <div class=\"panel_profiler\">\n",
       "            <h3>Frequency</h3>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <th>Value</th>\n",
       "                <th>Count</th>\n",
       "                <th>Frequency (%)</th>\n",
       "                \n",
       "                <tr>\n",
       "                    <td>1564244</td>\n",
       "                    <td>145</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>790903</td>\n",
       "                    <td>137</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>61355</td>\n",
       "                    <td>127</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2970392</td>\n",
       "                    <td>121</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2069920</td>\n",
       "                    <td>116</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>3308010</td>\n",
       "                    <td>115</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2753324</td>\n",
       "                    <td>114</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2499774</td>\n",
       "                    <td>112</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2621625</td>\n",
       "                    <td>109</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>77151</td>\n",
       "                    <td>109</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>\"Missing\"</td>\n",
       "                    <td>0</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </div>\n",
       "        \n",
       "\n",
       "        \n",
       "        <div class=\"panel_profiler\">\n",
       "\n",
       "\n",
       "            <h3>Quantile statistics</h3>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Minimum</td>\n",
       "                    <td>2</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>5-th percentile</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Q1</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Median</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Q3</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>95-th percentile</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Maximum</td>\n",
       "                    <td>3421083</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Range</td>\n",
       "                    <td>3421081</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Interquartile range</td>\n",
       "                    <td>0.0</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </div>\n",
       "        <div class=\"panel_profiler\">\n",
       "            <h3>Descriptive statistics</h3>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Standard deviation</td>\n",
       "                    <td>987300.6964529774</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Coef of variation</td>\n",
       "                    <td>0.57712</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Kurtosis</td>\n",
       "                    <td>-1.199128348852751</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Mean</td>\n",
       "                    <td>1710748.5189427834</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>MAD</td>\n",
       "                    <td>0.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Skewness</td>\n",
       "                    <td>0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Sum</td>\n",
       "                    <td>55487254019416</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Variance</td>\n",
       "                    <td>974762665216.534</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </div>\n",
       "        \n",
       "    </div>\n",
       "    <table>\n",
       "        \n",
       "        <tr>\n",
       "\n",
       "            <td>\n",
       "\n",
       "                <div>\n",
       "                    <img src=\"\">\n",
       "                </div>\n",
       "            </td>\n",
       "        </tr>\n",
       "        \n",
       "        \n",
       "        <tr>\n",
       "            <td>\n",
       "                <div>\n",
       "                    <img src=\"\">\n",
       "                </div>\n",
       "\n",
       "            </td>\n",
       "\n",
       "        </tr>\n",
       "        \n",
       "        \n",
       "        \n",
       "        \n",
       "        \n",
       "        \n",
       "\n",
       "    </table>\n",
       "</div><style>\n",
       "    .data_type {\n",
       "        font-size: 0.8em;\n",
       "        font-weight: normal;\n",
       "    }\n",
       "\n",
       "    .column_name {\n",
       "        font-size: 1.2em;\n",
       "    }\n",
       "\n",
       "    .info_items {\n",
       "        margin: 10px 0;\n",
       "        font-size: 0.8em;\n",
       "    }\n",
       "\n",
       "    .optimus_table tr:nth-child(even) {\n",
       "        background-color: #f2f2f2 !important;\n",
       "    }\n",
       "\n",
       "    .optimus_table tr:nth-child(odd) {\n",
       "        background-color: #ffffff !important;\n",
       "    }\n",
       "\n",
       "    .optimus_table thead {\n",
       "        border-bottom: 1px solid black;\n",
       "    }\n",
       "    .optimus_table{\n",
       "        font-size: 12px;\n",
       "    }\n",
       "\n",
       "</style>\n",
       "\n",
       "\n",
       "\n",
       "\n",
       "<div class=\"info_items\">Viewing 10 of 32.4 million rows / 4 columns</div>\n",
       "<div class=\"info_items\">8 partition(s)</div>\n",
       "\n",
       "<table class=\"optimus_table\">\n",
       "    <thead>\n",
       "    <tr>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">order_id</div>\n",
       "            <div class=\"data_type\">1 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">product_id</div>\n",
       "            <div class=\"data_type\">2 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">add_to_cart_order</div>\n",
       "            <div class=\"data_type\">3 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">reordered</div>\n",
       "            <div class=\"data_type\">4 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "    </tr>\n",
       "\n",
       "    </thead>\n",
       "    <tbody>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            33120\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            28985\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9327\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            45918\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            30035\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17794\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            40141\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1819\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            43668\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            33754\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    </tbody>\n",
       "</table>\n",
       "\n",
       "<div class=\"info_items\">Viewing 10 of 32.4 million rows / 4 columns</div>\n",
       "<div class=\"info_items\">8 partition(s)</div>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "run() executed in 186.8 sec\n"
     ]
    }
   ],
   "source": [
    "op.profiler.run(df, \"order_id\", infer=False, relative_error=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing column 'order_id'...\n",
      "_count_data_types() executed in 21.72 sec\n",
      "count_data_types() executed in 21.72 sec\n",
      "cast_columns() executed in 0.01 sec\n",
      "_exprs() executed in 17.72 sec\n",
      "general_stats() executed in 17.73 sec\n",
      "------------------------------\n",
      "Processing column 'order_id'...\n",
      "frequency() executed in 25.8 sec\n",
      "stats_by_column() executed in 9.99 sec\n",
      "percentile() executed in 13.46 sec\n",
      "extra_numeric_stats() executed in 39.63 sec\n",
      "bucketizer() executed in 0.3 sec\n",
      "hist() executed in 14.25 sec\n",
      "dataset_info() executed in 22.55 sec\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>\n",
       "    .main{\n",
       "        width:100%;\n",
       "        overflow:auto;\n",
       "        border-bottom:1px solid #eeeeee;\n",
       "        padding: 10px 0;\n",
       "    }\n",
       "    .panel_profiler{\n",
       "        margin-right:2%;\n",
       "        float:left;\n",
       "        padding-bottom:2%;\n",
       "    }\n",
       "    .title_profiler{\n",
       "        padding:20px;\n",
       "        background-color: #eeeeee\n",
       "    }\n",
       "    .info{\n",
       "        overflow: auto\n",
       "    }\n",
       "\n",
       "\n",
       "</style>\n",
       "\n",
       "<div class=\"title_profiler\">\n",
       "    <h1>Overview</h1>\n",
       "</div>\n",
       "<div class=\"main\">\n",
       "    <div class=\"panel_profiler\">\n",
       "        <h2>Dataset info</h2>\n",
       "        <table>\n",
       "            <tbody>\n",
       "            <tr>\n",
       "                <td>Number of columns</td>\n",
       "                <td>4</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Number of rows</td>\n",
       "                <td>32434489</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Total Missing (%)</td>\n",
       "                <td>0.0%</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Total size in memory</td>\n",
       "                <td>8.3 MB</td>\n",
       "\n",
       "            </tr>\n",
       "            </tbody>\n",
       "        </table>\n",
       "    </div>\n",
       "    <div class=\"panel_profiler\">\n",
       "        <h2>Column types</h2>\n",
       "        <table>\n",
       "            <tbody>\n",
       "            <tr>\n",
       "                <td>String</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Numeric</td>\n",
       "                <td>1</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Date</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Bool</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "             <tr>\n",
       "                <td>Array</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            <tr>\n",
       "                <td>Not available</td>\n",
       "                <td>0</td>\n",
       "\n",
       "            </tr>\n",
       "            </tbody>\n",
       "        </table>\n",
       "    </div>\n",
       "</div><style>\n",
       "    .main{\n",
       "        width:100%;\n",
       "        overflow:auto;\n",
       "        border-bottom:1px solid #eeeeee;\n",
       "        padding: 10px 0;\n",
       "    }\n",
       "    .panel_profiler{\n",
       "        margin-right:2%;\n",
       "        float:left;\n",
       "        padding-bottom:2%;\n",
       "    }\n",
       "    .title_profiler{\n",
       "        padding:20px;\n",
       "        background-color: #eeeeee\n",
       "    }\n",
       "    .info{\n",
       "        overflow: auto\n",
       "    }\n",
       "\n",
       "\n",
       "\n",
       "</style>\n",
       "\n",
       "<div class=\"main\">\n",
       "    <div class=\"info\">\n",
       "\n",
       "        \n",
       "\n",
       "        <div class=\"panel_profiler\">\n",
       "            <div>\n",
       "                <h2>order_id</h2>\n",
       "                <div>numeric</div>\n",
       "            </div>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Unique</td>\n",
       "                    <td> 3025302</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Unique (%)</td>\n",
       "                    <td> 9.327</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Missing</td>\n",
       "                    <td>0.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Missing (%)</td>\n",
       "                    <td>0</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "            <div>\n",
       "                <h3>\n",
       "                    Datatypes\n",
       "                </h3>\n",
       "            </div>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        String\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Integer\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        32434489\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Float\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Bool\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Date\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Missing\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>\n",
       "                        Null\n",
       "                    </td>\n",
       "                    <td>\n",
       "                        0\n",
       "                    </td>\n",
       "\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "            \n",
       "            <div>\n",
       "                <h3>\n",
       "                    Basic Stats\n",
       "                </h3>\n",
       "\n",
       "            </div>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Mean</td>\n",
       "                    <td>1710748.5189427834</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Minimum</td>\n",
       "                    <td>2</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Maximum</td>\n",
       "                    <td>3421083</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Zeros(%)</td>\n",
       "                    <td>0</td>\n",
       "                </tr>\n",
       "\n",
       "                </tbody>\n",
       "            </table>\n",
       "            \n",
       "\n",
       "        </div>\n",
       "        <div class=\"panel_profiler\">\n",
       "            <h3>Frequency</h3>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <th>Value</th>\n",
       "                <th>Count</th>\n",
       "                <th>Frequency (%)</th>\n",
       "                \n",
       "                <tr>\n",
       "                    <td>1564244</td>\n",
       "                    <td>145</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>790903</td>\n",
       "                    <td>137</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>61355</td>\n",
       "                    <td>127</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2970392</td>\n",
       "                    <td>121</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2069920</td>\n",
       "                    <td>116</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>3308010</td>\n",
       "                    <td>115</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2753324</td>\n",
       "                    <td>114</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2499774</td>\n",
       "                    <td>112</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>2621625</td>\n",
       "                    <td>109</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>77151</td>\n",
       "                    <td>109</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "\n",
       "                \n",
       "                <tr>\n",
       "                    <td>\"Missing\"</td>\n",
       "                    <td>0</td>\n",
       "                    <td>0.0%</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </div>\n",
       "        \n",
       "\n",
       "        \n",
       "        <div class=\"panel_profiler\">\n",
       "\n",
       "\n",
       "            <h3>Quantile statistics</h3>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Minimum</td>\n",
       "                    <td>2</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>5-th percentile</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Q1</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Median</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Q3</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>95-th percentile</td>\n",
       "                    <td>2.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Maximum</td>\n",
       "                    <td>3421083</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Range</td>\n",
       "                    <td>3421081</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Interquartile range</td>\n",
       "                    <td>0.0</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </div>\n",
       "        <div class=\"panel_profiler\">\n",
       "            <h3>Descriptive statistics</h3>\n",
       "            <table>\n",
       "                <tbody>\n",
       "                <tr>\n",
       "                    <td>Standard deviation</td>\n",
       "                    <td>987300.6964529774</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Coef of variation</td>\n",
       "                    <td>0.57712</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Kurtosis</td>\n",
       "                    <td>-1.199128348852751</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Mean</td>\n",
       "                    <td>1710748.5189427834</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>MAD</td>\n",
       "                    <td>0.0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Skewness</td>\n",
       "                    <td>0</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Sum</td>\n",
       "                    <td>55487254019416</td>\n",
       "                </tr>\n",
       "                <tr>\n",
       "                    <td>Variance</td>\n",
       "                    <td>974762665216.534</td>\n",
       "                </tr>\n",
       "                </tbody>\n",
       "            </table>\n",
       "        </div>\n",
       "        \n",
       "    </div>\n",
       "    <table>\n",
       "        \n",
       "        <tr>\n",
       "\n",
       "            <td>\n",
       "\n",
       "                <div>\n",
       "                    <img src=\"\">\n",
       "                </div>\n",
       "            </td>\n",
       "        </tr>\n",
       "        \n",
       "        \n",
       "        <tr>\n",
       "            <td>\n",
       "                <div>\n",
       "                    <img src=\"\">\n",
       "                </div>\n",
       "\n",
       "            </td>\n",
       "\n",
       "        </tr>\n",
       "        \n",
       "        \n",
       "        \n",
       "        \n",
       "        \n",
       "        \n",
       "\n",
       "    </table>\n",
       "</div><style>\n",
       "    .data_type {\n",
       "        font-size: 0.8em;\n",
       "        font-weight: normal;\n",
       "    }\n",
       "\n",
       "    .column_name {\n",
       "        font-size: 1.2em;\n",
       "    }\n",
       "\n",
       "    .info_items {\n",
       "        margin: 10px 0;\n",
       "        font-size: 0.8em;\n",
       "    }\n",
       "\n",
       "    .optimus_table tr:nth-child(even) {\n",
       "        background-color: #f2f2f2 !important;\n",
       "    }\n",
       "\n",
       "    .optimus_table tr:nth-child(odd) {\n",
       "        background-color: #ffffff !important;\n",
       "    }\n",
       "\n",
       "    .optimus_table thead {\n",
       "        border-bottom: 1px solid black;\n",
       "    }\n",
       "    .optimus_table{\n",
       "        font-size: 12px;\n",
       "    }\n",
       "\n",
       "</style>\n",
       "\n",
       "\n",
       "\n",
       "\n",
       "<div class=\"info_items\">Viewing 10 of 32.4 million rows / 4 columns</div>\n",
       "<div class=\"info_items\">8 partition(s)</div>\n",
       "\n",
       "<table class=\"optimus_table\">\n",
       "    <thead>\n",
       "    <tr>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">order_id</div>\n",
       "            <div class=\"data_type\">1 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">product_id</div>\n",
       "            <div class=\"data_type\">2 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">add_to_cart_order</div>\n",
       "            <div class=\"data_type\">3 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "        <th>\n",
       "            <div class=\"column_name\">reordered</div>\n",
       "            <div class=\"data_type\">4 (int)</div>\n",
       "            <div class=\"data_type\">\n",
       "                \n",
       "                nullable\n",
       "                \n",
       "            </div>\n",
       "        </th>\n",
       "        \n",
       "    </tr>\n",
       "\n",
       "    </thead>\n",
       "    <tbody>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            33120\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            28985\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9327\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            45918\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            4\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            30035\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            5\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            17794\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            6\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            40141\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            7\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1819\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            8\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            2\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            43668\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            9\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            0\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    <tr>\n",
       "        \n",
       "        <td>\n",
       "            3\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            33754\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "        <td>\n",
       "            1\n",
       "        </td>\n",
       "        \n",
       "    </tr>\n",
       "    \n",
       "    </tbody>\n",
       "</table>\n",
       "\n",
       "<div class=\"info_items\">Viewing 10 of 32.4 million rows / 4 columns</div>\n",
       "<div class=\"info_items\">8 partition(s)</div>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "run() executed in 199.09 sec\n"
     ]
    }
   ],
   "source": [
    "op.profiler.run(df, \"order_id\", infer=True, relative_error=1)"
   ]
  }
 ],
 "metadata": {
  "kernel_info": {
   "name": "python3"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "nteract": {
   "version": "0.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
